/*******************************************************************************
 * Copyright (c) 2008 Scott Stanchfield.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *   Based on the ANTLR parser generator by Terence Parr, http://antlr.org
 *   Ric Klaren <klaren@cs.utwente.nl>
 *   Scott Stanchfield - Modifications for XML Parsing
 *******************************************************************************/
package com.javadude.antxr.scanner;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.SAXParser;

import org.xml.sax.Attributes;
import org.xml.sax.DTDHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

import com.javadude.antxr.CommonToken;
import com.javadude.antxr.Token;
import com.javadude.antxr.TokenStream;
import com.javadude.antxr.TokenStreamException;

/**
 * An XML token stream. You can pass any SAX parser, with whatever configuration
 * you want for use as the scanner.
 */
public class XMLTokenStream implements TokenStream {
    private boolean[] startTag;
    private Map<String, Map<String, Integer>> namespaces = new HashMap<String, Map<String,Integer>>();
    private Map<String, Integer> tokens = new HashMap<String, Integer>();
    private BlockingQueue<Object> blockingQueue;
    private int pcdataNum;
    private StringBuffer currentCharacters = new StringBuffer();
    private int currentCharactersLine = -1;
    private int currentCharactersColumn = -1;
    private int endTagValue;
    private int otherTagValue = -1;

    /**
     * Create the xml token stream. This version does not gate the number of
     * tokens read by the SAX parser. <i>Note that this can cause the entire
     * XML to be read into memory!</i> If you have a small XML document to
     * parse, this is more efficient, but large XML documents can cause memory
     * problems. If you want to use a large XML file, call the other constructor
     * and pass it a maximumQueueSize and resumeQueueSize.
     * @param tokenNames An array of token names for your parser. You can get
     *                   this by passing YourParser._tokenNames, where YourParser
     *                   is an XML parser generated by ANTXR
     * @param namespaceMap  A map of namespace/prefix mappings. You can get this
     *                      by passing YourParser.getNamespaceMap(), where
     *                      YourParser is an XML parser generated by ANTXR
     * @param in The XML InputSource containing the XML to parse
     * @param parser The SAX Parser that you want to use to scan (and possibly
     *               validate) your XML
     * @param entityResolver An XML Entity resolver for the SAX parse (if needed), or null
     * @param dtdHandler and XML DTD Handler for theSAX parse (if needed), or null
     */
    public XMLTokenStream(String[] tokenNames, Map<String, String> namespaceMap, InputSource in, SAXParser parser, EntityResolver entityResolver, DTDHandler dtdHandler) {
        this(tokenNames, namespaceMap, in, parser, entityResolver, dtdHandler, -1, -1);
    }

    /**
     * Create the xml token stream. This version does not gate the number of
     * tokens read by the SAX parser. <i>Note that this can cause the entire
     * XML to be read into memory!</i> If you have a small XML document to
     * parse, this is more efficient, but large XML documents can cause memory
     * problems. If you want to use a large XML file, call the other constructor
     * and pass it a maximumQueueSize and resumeQueueSize.
     * @param tokenNames An array of token names for your parser. You can get
     *                   this by passing YourParser._tokenNames, where YourParser
     *                   is an XML parser generated by ANTXR
     * @param namespaceMap  A map of namespace/prefix mappings. You can get this
     *                      by passing YourParser.getNamespaceMap(), where
     *                      YourParser is an XML parser generated by ANTXR
     * @param in The XML InputSource containing the XML to parse
     * @param parser The SAX Parser that you want to use to scan (and possibly
     *               validate) your XML
     * @param entityResolver An XML Entity resolver for the SAX parse (if needed), or null
     * @param dtdHandler and XML DTD Handler for theSAX parse (if needed), or null
     * @param maximumQueueSize the maximum number of tokens you want to place
     *                         in the blocking queue ready for the ANTXR parser
     *                         to fetch. This will put the SAX parse on hold
     *                         until resumeQueue size is reached.
     * @param resumeQueueSize The number of buffered tokens at which you will
     *                        resume the SAX parse
     */
    public XMLTokenStream(String[] tokenNames, Map<String, String> namespaceMap, InputSource in, SAXParser parser, EntityResolver entityResolver, DTDHandler dtdHandler, int maximumQueueSize, int resumeQueueSize) {
        readTokens(tokenNames, namespaceMap);
        // TODO avoid NPE on following
        Integer tokenNum = tokens.get("PCDATA");
        if (tokenNum == null) {
            pcdataNum = -99;
        } else {
            pcdataNum = tokenNum.intValue();
        }
        blockingQueue = new BlockingQueue<Object>(maximumQueueSize,resumeQueueSize);
        parse(parser, in, entityResolver, dtdHandler);
    }

    /**
     * Set up the tokens to use when scanning
     * @param tokenNames The names of the tokens in the grammar
     * @param namespaceMap A mapping that includes prefixes
     */
    private void readTokens(String[] tokenNames, Map<String, String> namespaceMap) {
        startTag = new boolean[tokenNames.length];
        Pattern pattern = Pattern.compile("\"<((.*):)?(.*)>\"");
        for (int i = 0; i < tokenNames.length; i++) {
            String tokenName = tokenNames[i];
            Matcher matcher = pattern.matcher(tokenName);
            Integer integerValue = new Integer(i);
            if (matcher.matches()) {
                String namespace = matcher.group(2);
                String tag = matcher.group(3);
                if (namespace == null) {
                    namespace = namespaceMap.get("$DEFAULT");
                }
                addTag(namespace, tag, integerValue);
            }
            else {
                tokens.put(tokenName,integerValue);
                if ("XML_END_TAG".equals(tokenName)) {
                    endTagValue = integerValue.intValue();
                }
                if ("OTHER_TAG".equals(tokenName)) {
                    otherTagValue = integerValue.intValue();
                }
            }
        }
    }

    // TODO if only one namespace, optimize further (no hashmap lookup)
    /**
     * Get the numerical token number for an XML tag
     * @param namespace The tag's namespace
     * @param tag The tag name
     * @return The tag's token id
     */
    private Integer getTokenValue(String namespace, String tag) {
        return getTags(namespace).get(tag);
    }

    /**
     * Add an XML tag to our mapping
     * @param namespace The namespace/prefix map from the grammar
     * @param tag The xml tag to store
     * @param integerValue The integer value of the tag
     */
    private void addTag(String namespace, String tag, Integer integerValue) {
        if (namespace == null) {
            namespace = "";
        }

        getTags(namespace).put(tag, integerValue);
        startTag[integerValue.intValue()] = true;
    }

    /**
     * State whether the given token is an XML start tag
     * @param token the token to check
     * @return true if it's a start tag, false otherwise
     */
    public boolean isStartTag(Token token) {
        return startTag[token.getType()];
    }

    /**
     * Get all the tags defined in the given namespace
     * @param namespace The namespace to check
     * @return A map of tags to token ids
     */
    private Map<String, Integer> getTags(String namespace) {
        if (namespace == null) {
            namespace = "";
        }
        Map<String, Integer> tags = namespaces.get(namespace);
        if (tags == null) {
            tags = new HashMap<String, Integer>();
            namespaces.put(namespace, tags);
        }
        return tags;
    }

    /**
     * Start parsing the XML
     * @param parser The SAX parser to use
     * @param in The XML to parse
     * @param entityResolver The user-defined entity resolver (or null)
     * @param dtdHandler The user-defined DTD handler (or null)
     */
    private void parse(final SAXParser parser, final InputSource in, EntityResolver entityResolver, DTDHandler dtdHandler) {

        final ANTXRXMLHandler handler = new ANTXRXMLHandler(entityResolver, dtdHandler);

        Thread saxParseThread = new Thread("saxParserCreatingXMLTokens") {
            @Override
            public void run() {
                try {
                    parser.parse(in,handler);
                }
                catch (Throwable t) {
                    blockingQueue.enqueue(t); // stuff any exceptions in the queue
                }
            }
        };

        saxParseThread.setDaemon(true);
        saxParseThread.start();
    }

    /** {@inheritDoc} */
    public Token nextToken() throws TokenStreamException {
        try {
            Object o = blockingQueue.dequeue();
            if (o instanceof Throwable) {
                throw (Throwable)o;
            }
            return (Token)o;
        }
        catch (Throwable e) {
        	StringWriter sw = new StringWriter();
        	PrintWriter pw = new PrintWriter(sw);
        	e.printStackTrace(pw);
        	pw.close();
        	String lineCol = "";
        	if (e instanceof SAXParseException) {
        		SAXParseException se = (SAXParseException) e;
        		lineCol = " (line " + se.getLineNumber() + " col " + se.getColumnNumber() + ")";
        	}
            throw new TokenStreamException("Error during XML parse" + lineCol + ':' + sw);
        }
    }

    /**
     * The SAX handler that glues the SAX parser to our blocking queue.
     * This class grabs notifications of tags from the SAX parser, creates
     * ANTXR tokens from them, and stuffs the tokens in the blocking queue.
     * The nextToken method returns tokens off the queue when asked.
     *
     * If the caller passes in a DTD and/or entity resolver, we delegate to
     * them when appropriate during the SAX parse.
     */
    class ANTXRXMLHandler extends DefaultHandler {
        private Locator locator;
        private EntityResolver entityResolver;
        private DTDHandler dtdHandler;

        /**
         * Create the handler
         * @param entityResolver A user-defined entity resolver to delegate to
         * @param dtdHandler A user-defined dtd handler to delegate to
         */
        public ANTXRXMLHandler(EntityResolver entityResolver, DTDHandler dtdHandler) {
            this.entityResolver = entityResolver;
            this.dtdHandler = dtdHandler;
        }

        /** {@inheritDoc} */
        @Override
        public void setDocumentLocator(Locator locator) {
            this.locator = locator;
        }

        /** {@inheritDoc} */
        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            // collect all adjacent character chunks into a single PCDATA
            //   to return to the parser
            // if PCDATA isn't used in the parser, don't collect characters
            if (pcdataNum == -99) {
                return;
            }
            if (currentCharactersLine == -1) {
                currentCharactersLine = locator.getLineNumber();
                currentCharactersColumn = locator.getColumnNumber();
            }
            currentCharacters.append(ch, start, length);
        }

        /**
         * Finish our PCDATA and send it to the parser.
         */
        protected void finishCharacters() {
            // if PCDATA isn't used in the parser, don't collect characters
            if (pcdataNum == -99) {
                return;
            }
            int line = currentCharactersLine;
            int column = currentCharactersColumn;
            currentCharactersLine = -1;
            currentCharactersColumn = -1;
            String characters = currentCharacters.toString();
            currentCharacters.delete(0, currentCharacters.length());
            if ("".equals(characters.trim())) {
                return;
            }

            Token token = new CommonToken(pcdataNum,characters);
            token.setLine(line);
            token.setColumn(column);
            blockingQueue.enqueue(token);
        }

        /** {@inheritDoc} */
        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            finishCharacters(); // if we were working on a PCDATA, send it!

            // queue an XML_END_TAG token
            Token token = new CommonToken(endTagValue,"");
            token.setLine(locator.getLineNumber());
            token.setColumn(locator.getColumnNumber());
            blockingQueue.enqueue(token);
        }

        /** {@inheritDoc} */
        @Override
        public void endDocument() throws SAXException {
            finishCharacters(); // if we were working on a PCDATA, send it!
            // queue an EOF_TOKEN
            CommonToken eofToken = new CommonToken(Token.EOF_TYPE,"");
            eofToken.setLine(locator.getLineNumber());
            eofToken.setColumn(locator.getColumnNumber());
            blockingQueue.enqueue(eofToken);
        }

        /** {@inheritDoc} */
        @Override
        public void error(SAXParseException e) throws SAXException {
            finishCharacters(); // if we were working on a PCDATA, send it!
            throw e;
        }

        /** {@inheritDoc} */
        @Override
        public void fatalError(SAXParseException e) throws SAXException {
            finishCharacters(); // if we were working on a PCDATA, send it!
            throw e;
        }

        /** {@inheritDoc} */
        @Override
        public void warning(SAXParseException e) throws SAXException {
            finishCharacters(); // if we were working on a PCDATA, send it!
            throw e;
        }

        /** {@inheritDoc} */
        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            finishCharacters(); // if we were working on a PCDATA, send it!
            // queue a start tag token for the tag
            if ("".equals(localName)) {
                localName = qName;
            }
            blockingQueue.enqueue(createXMLToken(uri, localName, attributes));
        }

        /**
         * Create an XML token.
         * @param uri The namespace of the tag
         * @param localName The local name of the tag
         * @param attributes The tag attributes
         * @return An XMLToken
         * @throws SAXException If we have trouble accessing the SAX attributes
         */
        private XMLToken createXMLToken(String uri, String localName, Attributes attributes) throws SAXException {
            Integer id = getTokenValue(uri, localName);
            String name = "";
            if (uri != null && !"".equals(uri.trim())) {
                name += uri + ":";
            }
            name += localName;
            int tokenValue;
            if (id != null) {
                tokenValue = id.intValue();
            }
            else if (otherTagValue != -1) {
                tokenValue = otherTagValue;
            }
            else {
                throw new SAXException("Tag '" + name + "' not defined in parser grammar");
            }

            List<Attribute> attributeList;

            if (attributes == null || attributes.getLength() == 0) {
                attributeList = Collections.emptyList();
            } else {
                attributeList = new ArrayList<Attribute>(attributes.getLength());
                for (int i = 0; i < attributes.getLength(); i++) {
                    String localAttributeName = attributes.getLocalName(i);
                    if ("".equals(localAttributeName)) {
                        localAttributeName = attributes.getQName(i);
                    }
                    String namespace = attributes.getURI(i);
                    String value = attributes.getValue(i);
                    String type = attributes.getType(i);
                    Attribute attribute = new Attribute(namespace,localAttributeName,value,type);
                    attributeList.add(attribute);
                }
            }

            XMLToken token = new XMLToken(tokenValue, name, attributeList);
            token.setLine(locator.getLineNumber());
            token.setColumn(locator.getColumnNumber());
            return token;
        }

        /** {@inheritDoc} */
        @Override
        public void notationDecl(String name, String publicId, String systemId) throws SAXException {
            // If we have an explicit DTD handler, delegate to it
            if (dtdHandler != null) {
                dtdHandler.notationDecl(name, publicId, systemId);
            } else {
                super.notationDecl(name, publicId, systemId);
            }
        }

        /** {@inheritDoc} */
        @Override
        public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException {
            // If we have an explicit DTD handler, delegate to it
            if (dtdHandler != null) {
                dtdHandler.unparsedEntityDecl(name, publicId, systemId, notationName);
            } else {
                super.unparsedEntityDecl(name, publicId, systemId, notationName);
            }
        }

        /** {@inheritDoc} */
        @Override
        public InputSource resolveEntity(String publicId, String systemId) throws SAXException {
            // If we have an explicit entity resolver, delegate to it
            if (entityResolver != null) {
                try {
                    return entityResolver.resolveEntity(publicId, systemId);
                }
                catch (IOException e) {
                    throw new SAXException(e);
                }
            }
            try {
                return super.resolveEntity(publicId, systemId);
            } catch (Exception e) {
                throw new SAXException(e);
            }
        }
    }
}
